/*
 * Copyright (c) 2020 Björn Ottosson
 * Copyright (c) 2022 Clément Bœsch <u pkh me>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/common.h"
#include "palette.h"

#define K ((1 << 16) - 1)
#define K2 ((int64_t)K*K)
#define P ((1 << 9) - 1)

/**
 * Table mapping formula:
 *   f(x) = x < 0.04045 ? x/12.92 : ((x+0.055)/1.055)^2.4  (sRGB EOTF)
 * Where x is the normalized index in the table and f(x) the value in the table.
 * f(x) is remapped to [0;K] and rounded.
 */
static const uint16_t srgb2linear[256] = {
    0x0000, 0x0014, 0x0028, 0x003c, 0x0050, 0x0063, 0x0077, 0x008b,
    0x009f, 0x00b3, 0x00c7, 0x00db, 0x00f1, 0x0108, 0x0120, 0x0139,
    0x0154, 0x016f, 0x018c, 0x01ab, 0x01ca, 0x01eb, 0x020e, 0x0232,
    0x0257, 0x027d, 0x02a5, 0x02ce, 0x02f9, 0x0325, 0x0353, 0x0382,
    0x03b3, 0x03e5, 0x0418, 0x044d, 0x0484, 0x04bc, 0x04f6, 0x0532,
    0x056f, 0x05ad, 0x05ed, 0x062f, 0x0673, 0x06b8, 0x06fe, 0x0747,
    0x0791, 0x07dd, 0x082a, 0x087a, 0x08ca, 0x091d, 0x0972, 0x09c8,
    0x0a20, 0x0a79, 0x0ad5, 0x0b32, 0x0b91, 0x0bf2, 0x0c55, 0x0cba,
    0x0d20, 0x0d88, 0x0df2, 0x0e5e, 0x0ecc, 0x0f3c, 0x0fae, 0x1021,
    0x1097, 0x110e, 0x1188, 0x1203, 0x1280, 0x1300, 0x1381, 0x1404,
    0x1489, 0x1510, 0x159a, 0x1625, 0x16b2, 0x1741, 0x17d3, 0x1866,
    0x18fb, 0x1993, 0x1a2c, 0x1ac8, 0x1b66, 0x1c06, 0x1ca7, 0x1d4c,
    0x1df2, 0x1e9a, 0x1f44, 0x1ff1, 0x20a0, 0x2150, 0x2204, 0x22b9,
    0x2370, 0x242a, 0x24e5, 0x25a3, 0x2664, 0x2726, 0x27eb, 0x28b1,
    0x297b, 0x2a46, 0x2b14, 0x2be3, 0x2cb6, 0x2d8a, 0x2e61, 0x2f3a,
    0x3015, 0x30f2, 0x31d2, 0x32b4, 0x3399, 0x3480, 0x3569, 0x3655,
    0x3742, 0x3833, 0x3925, 0x3a1a, 0x3b12, 0x3c0b, 0x3d07, 0x3e06,
    0x3f07, 0x400a, 0x4110, 0x4218, 0x4323, 0x4430, 0x453f, 0x4651,
    0x4765, 0x487c, 0x4995, 0x4ab1, 0x4bcf, 0x4cf0, 0x4e13, 0x4f39,
    0x5061, 0x518c, 0x52b9, 0x53e9, 0x551b, 0x5650, 0x5787, 0x58c1,
    0x59fe, 0x5b3d, 0x5c7e, 0x5dc2, 0x5f09, 0x6052, 0x619e, 0x62ed,
    0x643e, 0x6591, 0x66e8, 0x6840, 0x699c, 0x6afa, 0x6c5b, 0x6dbe,
    0x6f24, 0x708d, 0x71f8, 0x7366, 0x74d7, 0x764a, 0x77c0, 0x7939,
    0x7ab4, 0x7c32, 0x7db3, 0x7f37, 0x80bd, 0x8246, 0x83d1, 0x855f,
    0x86f0, 0x8884, 0x8a1b, 0x8bb4, 0x8d50, 0x8eef, 0x9090, 0x9235,
    0x93dc, 0x9586, 0x9732, 0x98e2, 0x9a94, 0x9c49, 0x9e01, 0x9fbb,
    0xa179, 0xa339, 0xa4fc, 0xa6c2, 0xa88b, 0xaa56, 0xac25, 0xadf6,
    0xafca, 0xb1a1, 0xb37b, 0xb557, 0xb737, 0xb919, 0xbaff, 0xbce7,
    0xbed2, 0xc0c0, 0xc2b1, 0xc4a5, 0xc69c, 0xc895, 0xca92, 0xcc91,
    0xce94, 0xd099, 0xd2a1, 0xd4ad, 0xd6bb, 0xd8cc, 0xdae0, 0xdcf7,
    0xdf11, 0xe12e, 0xe34e, 0xe571, 0xe797, 0xe9c0, 0xebec, 0xee1b,
    0xf04d, 0xf282, 0xf4ba, 0xf6f5, 0xf933, 0xfb74, 0xfdb8, 0xffff,
};

/**
 * Table mapping formula:
 *   f(x) = x < 0.0031308 ? x*12.92 : 1.055*x^(1/2.4)-0.055  (sRGB OETF)
 * Where x is the normalized index in the table and f(x) the value in the table.
 * f(x) is remapped to [0;0xff] and rounded.
 *
 * Since a 16-bit table is too large, we reduce its precision to 9-bit.
 */
static const uint8_t linear2srgb[P + 1] = {
    0x00, 0x06, 0x0d, 0x12, 0x16, 0x19, 0x1c, 0x1f, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30,
    0x32, 0x33, 0x35, 0x36, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e, 0x40, 0x41, 0x42, 0x43, 0x45, 0x46,
    0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
    0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x5f, 0x60, 0x61, 0x62, 0x62,
    0x63, 0x64, 0x65, 0x65, 0x66, 0x67, 0x67, 0x68, 0x69, 0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e,
    0x6e, 0x6f, 0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74, 0x74, 0x75, 0x76, 0x76, 0x77, 0x77,
    0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7e, 0x7f, 0x7f, 0x80, 0x80,
    0x81, 0x81, 0x82, 0x82, 0x83, 0x84, 0x84, 0x85, 0x85, 0x86, 0x86, 0x87, 0x87, 0x88, 0x88, 0x89,
    0x89, 0x8a, 0x8a, 0x8b, 0x8b, 0x8c, 0x8c, 0x8c, 0x8d, 0x8d, 0x8e, 0x8e, 0x8f, 0x8f, 0x90, 0x90,
    0x91, 0x91, 0x92, 0x92, 0x93, 0x93, 0x93, 0x94, 0x94, 0x95, 0x95, 0x96, 0x96, 0x97, 0x97, 0x97,
    0x98, 0x98, 0x99, 0x99, 0x9a, 0x9a, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9c, 0x9d, 0x9d, 0x9e, 0x9e,
    0x9f, 0x9f, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa1, 0xa2, 0xa2, 0xa3, 0xa3, 0xa3, 0xa4, 0xa4, 0xa5,
    0xa5, 0xa5, 0xa6, 0xa6, 0xa6, 0xa7, 0xa7, 0xa8, 0xa8, 0xa8, 0xa9, 0xa9, 0xa9, 0xaa, 0xaa, 0xab,
    0xab, 0xab, 0xac, 0xac, 0xac, 0xad, 0xad, 0xae, 0xae, 0xae, 0xaf, 0xaf, 0xaf, 0xb0, 0xb0, 0xb0,
    0xb1, 0xb1, 0xb1, 0xb2, 0xb2, 0xb3, 0xb3, 0xb3, 0xb4, 0xb4, 0xb4, 0xb5, 0xb5, 0xb5, 0xb6, 0xb6,
    0xb6, 0xb7, 0xb7, 0xb7, 0xb8, 0xb8, 0xb8, 0xb9, 0xb9, 0xb9, 0xba, 0xba, 0xba, 0xbb, 0xbb, 0xbb,
    0xbc, 0xbc, 0xbc, 0xbd, 0xbd, 0xbd, 0xbe, 0xbe, 0xbe, 0xbf, 0xbf, 0xbf, 0xc0, 0xc0, 0xc0, 0xc1,
    0xc1, 0xc1, 0xc1, 0xc2, 0xc2, 0xc2, 0xc3, 0xc3, 0xc3, 0xc4, 0xc4, 0xc4, 0xc5, 0xc5, 0xc5, 0xc6,
    0xc6, 0xc6, 0xc6, 0xc7, 0xc7, 0xc7, 0xc8, 0xc8, 0xc8, 0xc9, 0xc9, 0xc9, 0xc9, 0xca, 0xca, 0xca,
    0xcb, 0xcb, 0xcb, 0xcc, 0xcc, 0xcc, 0xcc, 0xcd, 0xcd, 0xcd, 0xce, 0xce, 0xce, 0xce, 0xcf, 0xcf,
    0xcf, 0xd0, 0xd0, 0xd0, 0xd0, 0xd1, 0xd1, 0xd1, 0xd2, 0xd2, 0xd2, 0xd2, 0xd3, 0xd3, 0xd3, 0xd4,
    0xd4, 0xd4, 0xd4, 0xd5, 0xd5, 0xd5, 0xd6, 0xd6, 0xd6, 0xd6, 0xd7, 0xd7, 0xd7, 0xd7, 0xd8, 0xd8,
    0xd8, 0xd9, 0xd9, 0xd9, 0xd9, 0xda, 0xda, 0xda, 0xda, 0xdb, 0xdb, 0xdb, 0xdc, 0xdc, 0xdc, 0xdc,
    0xdd, 0xdd, 0xdd, 0xdd, 0xde, 0xde, 0xde, 0xde, 0xdf, 0xdf, 0xdf, 0xe0, 0xe0, 0xe0, 0xe0, 0xe1,
    0xe1, 0xe1, 0xe1, 0xe2, 0xe2, 0xe2, 0xe2, 0xe3, 0xe3, 0xe3, 0xe3, 0xe4, 0xe4, 0xe4, 0xe4, 0xe5,
    0xe5, 0xe5, 0xe5, 0xe6, 0xe6, 0xe6, 0xe6, 0xe7, 0xe7, 0xe7, 0xe7, 0xe8, 0xe8, 0xe8, 0xe8, 0xe9,
    0xe9, 0xe9, 0xe9, 0xea, 0xea, 0xea, 0xea, 0xeb, 0xeb, 0xeb, 0xeb, 0xec, 0xec, 0xec, 0xec, 0xed,
    0xed, 0xed, 0xed, 0xee, 0xee, 0xee, 0xee, 0xef, 0xef, 0xef, 0xef, 0xef, 0xf0, 0xf0, 0xf0, 0xf0,
    0xf1, 0xf1, 0xf1, 0xf1, 0xf2, 0xf2, 0xf2, 0xf2, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3, 0xf4, 0xf4, 0xf4,
    0xf4, 0xf5, 0xf5, 0xf5, 0xf5, 0xf6, 0xf6, 0xf6, 0xf6, 0xf6, 0xf7, 0xf7, 0xf7, 0xf7, 0xf8, 0xf8,
    0xf8, 0xf8, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xfa, 0xfa, 0xfa, 0xfa, 0xfb, 0xfb, 0xfb, 0xfb, 0xfb,
    0xfc, 0xfc, 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xff, 0xff,
};

uint8_t ff_linear_int_to_srgb_u8(int32_t x)
{
    if (x <= 0) {
        return 0;
    } else if (x >= K) {
        return 0xff;
    } else {
        const int32_t xP = x * P;
        const int32_t i = xP / K;
        const int32_t m = xP % K;
        const int32_t y0 = linear2srgb[i];
        const int32_t y1 = linear2srgb[i + 1];
        return (m * (y1 - y0) + K/2) / K + y0;
    }
}

/* Integer cube root, working only within [0;1] */
static int32_t cbrt01_int(int32_t x)
{
    int64_t u;

    /* Approximation curve is for the [0;1] range */
    if (x <= 0) return 0;
    if (x >= K) return K;

    /*
     * Initial approximation: x³ - 2.19893x² + 2.01593x + 0.219407
     *
     * We are not using any rounding here since the precision is not important
     * at this stage and it would require the more expensive rounding function
     * that deals with negative numbers.
     */
    u = x*(x*(x + -144107LL) / K + 132114LL) / K + 14379LL;

    /*
     * Refine with 2 Halley iterations:
     *   uₙ₊₁ = uₙ-2f(uₙ)f'(uₙ)/(2f'(uₙ)²-f(uₙ)f"(uₙ))
     *        = uₙ(2x+uₙ³)/(x+2uₙ³)
     *
     * Note: u is not expected to be < 0, so we can use the (a+b/2)/b rounding.
     */
    for (int i = 0; i < 2; i++) {
        const int64_t u3 = u*u*u;
        const int64_t den = x + (2*u3 + K2/2) / K2;
        u = (u * (2*x + (u3 + K2/2) / K2) + den/2) / den;
    }

    return u;
}

static int64_t div_round64(int64_t a, int64_t b) { return (a^b)<0 ? (a-b/2)/b : (a+b/2)/b; }

struct Lab ff_srgb_u8_to_oklab_int(uint32_t srgb)
{
    const int32_t r = (int32_t)srgb2linear[srgb >> 16 & 0xff];
    const int32_t g = (int32_t)srgb2linear[srgb >>  8 & 0xff];
    const int32_t b = (int32_t)srgb2linear[srgb       & 0xff];

    // Note: lms can actually be slightly over K due to rounded coefficients
    const int32_t l = (27015LL*r + 35149LL*g +  3372LL*b + K/2) / K;
    const int32_t m = (13887LL*r + 44610LL*g +  7038LL*b + K/2) / K;
    const int32_t s = ( 5787LL*r + 18462LL*g + 41286LL*b + K/2) / K;

    const int32_t l_ = cbrt01_int(l);
    const int32_t m_ = cbrt01_int(m);
    const int32_t s_ = cbrt01_int(s);

    const struct Lab ret = {
        .L = div_round64( 13792LL*l_ +  52010LL*m_ -   267LL*s_, K),
        .a = div_round64(129628LL*l_ - 159158LL*m_ + 29530LL*s_, K),
        .b = div_round64(  1698LL*l_ +  51299LL*m_ - 52997LL*s_, K),
    };

    return ret;
}

uint32_t ff_oklab_int_to_srgb_u8(struct Lab c)
{
    const int64_t l_ = c.L + div_round64(25974LL * c.a, K) + div_round64(14143LL * c.b, K);
    const int64_t m_ = c.L + div_round64(-6918LL * c.a, K) + div_round64(-4185LL * c.b, K);
    const int64_t s_ = c.L + div_round64(-5864LL * c.a, K) + div_round64(-84638LL * c.b, K);

    const int32_t l = l_*l_*l_ / K2;
    const int32_t m = m_*m_*m_ / K2;
    const int32_t s = s_*s_*s_ / K2;

    const uint8_t r = ff_linear_int_to_srgb_u8((267169LL * l + -216771LL * m + 15137LL * s + K/2) / K);
    const uint8_t g = ff_linear_int_to_srgb_u8((-83127LL * l + 171030LL * m + -22368LL * s + K/2) / K);
    const uint8_t b = ff_linear_int_to_srgb_u8((-275LL * l + -46099LL * m + 111909LL * s + K/2) / K);

    return r<<16 | g<<8 | b;
}

uint32_t ff_lowbias32(uint32_t x)
{
    x ^= x >> 16;
    x *= 0x7feb352d;
    x ^= x >> 15;
    x *= 0x846ca68b;
    x ^= x >> 16;
    return x;
}
